import com.fiehn.filter.FilterFactory
import resolver.analyzer.Analyzer
import resolver.analyzer.SimpleTextAnalyzer

/**
 * tests the text analyzer
 */
class SimpleTextAnalyzerTests extends GroovyTestCase {

  protected void setUp() {
    super.setUp()
  }

  protected void tearDown() {
    super.tearDown()
  }

  void testAnalyzeGenericTest() {
    String document = """

  Glucose (Glc), a monosaccharide (or simple sugar) also known as grape sugar, blood sugar, or corn sugar, is a very important carbohydrate in biology. The living cell uses it as a source of energy and metabolic intermediate. Glucose is one of the main products of photosynthesis and starts cellular respiration in both prokaryotes (bacteria and archaea) and eukaryotes (animals, plants, fungi, and protists).
The name "glucose" comes from the Greek word glukus (??????), meaning "sweet", and the suffix "-ose," which denotes a sugar.
Two stereoisomers of the aldohexose sugars are known as glucose, only one of which (D-glucose) is biologically active. This form (D-glucose) is often referred to as dextrose monohydrate, or, especially in the food industry, simply dextrose (from dextrorotatory glucose[2]). This article deals with the D-form of glucose. The mirror-image of the molecule, L-glucose, cannot be metabolized by cells in the biochemica
  """
    Analyzer ana = new SimpleTextAnalyzer()

    Set<String> result = ana.analyze(document)

    assertTrue(result.contains("glucose"))
    assertTrue(result.contains("aldohexose"))
    assertTrue(result.contains("D-glucose"))
    assertTrue(result.contains("Glc"))

    assertTrue(result.contains("dextrorotatory"))
    assertTrue(result.contains("animals"))
    assertTrue(result.contains("bacteria"))
    assertTrue(result.contains("fungi"))


  }


  void testAnalyzeMoreComplexTest() {
    String text =
    """
 bunch of rumble to find 1,3-Diaminopropane in D-Glucose and 1,1,1,2,2,3,3,4,4-nonafluoro-4-(1,1,2,2,3,3,4,4,4-nonafluorobutoxy)butane.
 It's also nice to have 3,3'-Oxybis(1-propene) or (R)-3-Hydroxybutyric acid. Last bot not least I'm a huge fan or 3,9-divinyl-2,4,8,10-tetraoxaspiro[5.5]undecane.
 Also it's a great feeling if we can find (glucose) in brakets without finding statment like (help i'm surrounded by brackets).
"""

    Analyzer ana = new SimpleTextAnalyzer()

    Set<String> result = ana.analyze(text)

    assertTrue(result.contains("1,3-Diaminopropane"))
    assertTrue(result.contains("D-Glucose"))
    assertTrue(result.contains("1,1,1,2,2,3,3,4,4-nonafluoro-4-(1,1,2,2,3,3,4,4,4-nonafluorobutoxy)butane"))
    assertTrue(result.contains("3,3'-Oxybis(1-propene)"))

    assertTrue(result.contains("(R)-3-Hydroxybutyric acid"))
    assertTrue(result.contains("3,9-divinyl-2,4,8,10-tetraoxaspiro[5.5]undecane"))
    assertTrue(result.contains("glucose"))

  }


  void testAnalyzeSynonymsTest() {

    //a couple of synonyms to test
    String synonomys = """

1-Methylhistidine
1,3-Diaminopropane
2-Ketobutyric acid
2-Hydroxybutyric acid
2-Methoxyestrone
(R)-3-Hydroxybutyric acid
Deoxyuridine
Deoxycytidine
Cortexolone
Deoxycorticosterone
2-methoxy-12-methyloctadec-17-en-5-ynoyl anhydride
N-(3S-hydroxydecanoyl)-L-serine
N-(3-(hexadecanoyloxy)-heptadecanoyl)-L-ornithine
N-(9Z,12Z,15Z-octadecatrienoyl)-glutamine
N-(3-(15-methyl-hexadecanoyloxy)-13-methyl-tetradecanoyl)-L-serine
2-((2S)-6-amino-2-(3-hydroxy-14-methylpentadecanamido)hexanoyloxy)ethyl-2-hydroxy-13-methyltetradecanoate
N-hydroxydecanamide
(9S,10S)-10-hydroxy-9-(phosphonooxy)octadecanoic acid
6-(6-aminohexanamido)hexanoic acid
hexadecanoic acid
ethanoic acid
propanoic acid
butanoic acid
pentanoic acid
hexanoic acid
heptanoic acid
octanoic acid
nonanoic acid
decanoic acid
undecanoic acid
dodecanoic acid
tridecanoic acid
tetradecanoic acid
pentadecanoic acid
heptadecanoic acid
octadecanoic acid
nonadecanoic acid
eicosanoic acid
heneicosanoic acid
docosanoic acid
tricosanoic acid
tetracosanoic acid
pentacosanoic acid
hexacosanoic acid
heptacosanoic acid
octacosanoic acid
nonacosanoic acid
triacontanoic acid
hentriacontanoic acid
dotriacontanoic acid
tritriacontanoic acid
tetratriacontanoic acid
pentatriacontanoic acid
hexatriacontanoic acid
heptatriacontanoic acid
octatriacontanoic acid
hexatetracontanoic acid
methanoic acid
2-hydroxy-2-methyl-propanoic acid
17-methyl-6Z-octadecenoic acid
6-methyl-octanoic acid
7-methyl-octanoic acid
10-methyl-undecanoic acid
10-methyl-dodecanoic acid
11-methyl-dodecanoic acid
12-methyl-tridecanoic acid
12-methyl-tetradecanoic acid
13-methyl-tetradecanoic acid
14-methy-pentadecanoic acid
14-methyl-hexadecanoic acid
15-methyl-hexadecanoic acid
10-methyl-heptadecanoic acid
16-methyl-heptadecanoic acid
10-methyl-octadecanoic acid
16-methyl-octadecanoic acid
18-methyl-nonadecanoic acid
(+)-18-methyl-eicosanoic acid
20-methyl-heneicosanoic acid
(+)-20-methyl-docosanoic acid
22-methyl-tricosanoic acid
3,13,19-trimethyl-tricosanoic acid
23-methyl-tetracosanoic acid
24-methyl-pentacosanoic acid
(+)-24-methyl-hexacosanoic acid
26-methyl-heptacosanoic acid
2,4,6-trimethyl-octacosanoic acid
(+)-28-methyl-triacontanoic acid
2-methyl-2Z-butenoic acid
2-methyl-2E-butenoic acid
4-methyl-3-pentenoic acid
2,4,6-trimethyl-2Z-tetracosenoic acid
2,6-dimethyl-nonadecanoic acid
2,6-dimethyl-undecanoic acid
2,6-dimethyl-dodecanoic acid
4,8-dimethyl-dodecanoic acid
4,12-dimethyl-tridecanoic acid
2,6-dimethyl-tetradecanoic acid
2,8-dimethyl-tetradecanoic acid
2,6-dimethyl-pentadecanoic acid
4,8-dimethyl-pentadecanoic acid
vitamin a
vitamine a
(1R,4aS,7S,7aR)-4,7-dimethyl-1,4a,5,6,7,7a-hexahydrocyclopenta[c]pyran-1-ol

(+)-3-Carene(+)-Delta(3)-carene(1S)-(+)-3-carene(1S)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene(1S,6R)-(+)-3-carene(S)-(+)-3-carene
(1S,6R)-3,7,7-trimethylbicyclo[4.1.0]hept-3-ene1alpha,6alpha-car-3-ene
(1R,2S,5R)-2-methyl-5-[(1R)-1-methyl-2-oxoethyl]cyclopentanecarbaldehyde


  """

    Analyzer ana = new SimpleTextAnalyzer()

    Set<String> result = ana.analyze(synonomys)

    println "false negative"

    synonomys.split("\n").each {
      if (!result.contains(it.trim())) {
        if (it.trim().size() > 0) {
          println "negative: ${it}"
        }
      }
    }

    println "-----------"

    println "false positive"
    result.each {String s ->

      if (!synonomys.split("\n").toList().contains(s)) {
        println "positive: ${s}"
      }

    }
    println "-----------"

    //our checks
    assertTrue(result.contains("(R)-3-Hydroxybutyric acid"))
    assertTrue(result.contains("1-Methylhistidine"))
    assertTrue(result.contains("1,3-Diaminopropane"))
    assertTrue(result.contains("2-Ketobutyric acid"))
    assertTrue(result.contains("2-Hydroxybutyric acid"))
    assertTrue(result.contains("2-Methoxyestrone"))
    assertTrue(result.contains("Deoxyuridine"))
    assertTrue(result.contains("Cortexolone"))
    assertTrue(result.contains("Deoxycorticosterone"))
    assertTrue(result.contains("6-(6-aminohexanamido)hexanoic acid"))
    assertTrue(result.contains("N-(3S-hydroxydecanoyl)-L-serine"))
    assertTrue(result.contains("N-(3-(hexadecanoyloxy)-heptadecanoyl)-L-ornithine"))
    assertTrue(result.contains("N-(3-(15-methyl-hexadecanoyloxy)-13-methyl-tetradecanoyl)-L-serine"))
    assertTrue(result.contains("N-hydroxydecanamide"))
    assertTrue(result.contains("2-((2S)-6-amino-2-(3-hydroxy-14-methylpentadecanamido)hexanoyloxy)ethyl-2-hydroxy-13-methyltetradecanoate"))
    assertTrue(result.contains("6-(6-aminohexanamido)hexanoic acid"))
    assertTrue(result.contains("2-methoxy-12-methyloctadec-17-en-5-ynoyl anhydride"))


  }


  void testAnalyzeManySynonymsEachLineASynonymTest() {

    //a couple of synonyms to test
    String synonomys = """
(+)-a-Pinene
(2-chloro-7,7-dimethylbicyclo[2.2.1]hept-1-yl)acetic acid
(2-hydroxy-4-methoxyphenyl)(phenyl)methanone
(3-amino-4-chlorophenyl)(hydroxy)azane oxide
(5-amino-2-methylphenyl)(hydroxy)azane oxide
(a-D-mannosyl)2-b-D-mannosyl-N-acetylglucosamine
(acetyloxy)(phenyl)methyl acetate
.alpha.-Hydroxypropionitrile
.alpha.-Naphthylamine-8-sulfonic acid
.beta.,.beta.'-Bis(cyanoethyl) ether
.beta.,.beta.'-Iminodipropionitrile
.beta.,.beta.'-Oxydipropionitrile
.beta.-Alaninol
.beta.-Ethoxypropionitrile
.gamma.-(Diethylamino)propylamine
.gamma.-Aminopropanol
.gamma.-Glutamic acid hydrazide
.gamma.-Glutamyl hydrazide
1,1'-Binaphthyl-8,8'-dicarboxylic acid
1,2,4,5,8-pentahydroxyanthra-9,10-quinone
1,2,4-triphenyl-1,4-butanedione
1,2-di-(2E,4E-hexadienoyl)-sn-glycero-3-phosphocholine
1,2-di-(2E,4E-octadienoyl)-sn-glycero-3-phosphocholine
1,2-diheptanoyl-sn-glycero-3-phosphocholine
1,2-dihexanoyl-sn-glycero-3-phosphocholine
1,2-dinonanoyl-sn-glycero-3-phosphocholine
1,2-dioctanoyl-sn-glycero-3-phosphocholine
1,2-divaleryl-sn-glycero-3-phosphocholine
1,4-bis(octyloxy)-1,4-dioxo-2-butanesulfonic acid
1,4-dioxo-1,4-bis(tridecyloxy)-2-butanesulfonic acid
1,7,7-trimethylbicyclo[2.2.1]hept-2-yl 3-phenylacrylate
1,7-Diamino-4-azaheptane
1-(3E,5E-hexadienoyl)-2-(11E,13E-tetradecadienoyl)-sn-glycero-3-phosphocholine
1-(7Z-hexadecenoyl)-2-(4Z,7Z,10Z,13Z,16Z,19Z-docosahexaenoyl)-sn-glycero-3-phosphocholine
1-(7Z-hexadecenoyl)-2-(5Z,8Z,11Z,14Z-eicosatetraenoyl)-sn-glycero-3-phosphocholine
1-(Diethylamino)propylamine-3
1-Amino-3-(diethylamino)propane
1-Amino-3-hydroxypropane
1-Amino-3-propanol
1-Amino-8-naphthalene sulfonate
1-Aminonaphthalene-8-sulfonate
1-Aminonaphthalene-8-sulfonic acid
1-Methyl-2-aldoximinopyridinium iodide
1-Methyl-2-hydroxyiminomethylpyridinium iodide
1-Naphthylamine-8-sulfonic acid
1-decyl-2-acetyl-sn-glycero-3-phosphocholine
1-dodecyl-2-acetyl-sn-glycero-3-phosphocholine
1-dodecyl-2-acetyl-sn-glycero-3-phosphocholine
1-dodecyl-2-decanoyl-sn-glycero-3-phosphocholine
1-eicosanoyl-2-(9Z,12Z-octadecadienoyl)-sn-glycero-3-phosphocholine
1-heptanoyl-2-heneicosanoyl-sn-glycero-3-phosphocholine
1-heptanoyl-2-heptanoyl-sn-glycero-3-phosphocholine
1-heptanoyl-2-octanoyl-sn-glycero-3-phosphocholine
1-hexadecyl-2-(9Z-octadecenoyl)-sn-glycero-3-phosphocholine
1-hexanoyl-2-heptanoyl-sn-glycero-3-phosphocholine
1-hexanoyl-2-hexadecanoyl-sn-glycero-3-phosphocholine
1-hexanoyl-2-hexanoyl-sn-glycero-3-phosphocholine
1-hexanoyl-2-octadecanoyl-sn-glycero-3-phosphocholine
1-hexanoyl-2-octanoyl-sn-glycero-3-phosphocholine
1-hexanoyl-2-octanoyl-sn-glycero-3-phosphocholine
1-methyl-1lambda~5~-pyridine-2-carbaldehyde oxime
1-methyl-2-((2-methyl-2-propenyl)oxy)-2-oxoethyl 2-(acetyloxy)-2-methylpropanoate
1-methyl-2-acetyl-sn-glycero-3-phosphocholine
1-methyl-2-acetyl-sn-glycero-3-phosphocholine
1-methyl-2-hexadecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-decanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-docosanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-dodecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-eicosanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-heneicosanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-heptadecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-nonadecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-nonanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-octadecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-pentadecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-tetracosanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-tetradecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-tricosanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-tridecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-undecanoyl-sn-glycero-3-phosphocholine
1-nonanoyl-2-valeryl-sn-glycero-3-phosphocholine
1-octanoyl-2-eicosanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-heneicosanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-heptadecanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-heptanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-hexanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-hexanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-nonadecanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-octadecanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-octanoyl-sn-glycero-3-phosphocholine
1-octanoyl-2-valeryl-sn-glycero-3-phosphocholine
1-tetradecanoyl-2-eicosanoyl-sn-glycero-3-phosphocholine
1-tetradecyl-2-(9Z-hexadecenoyl)-sn-glycero-3-phosphocholine
1-tetradecyl-2-hexadecanoyl-sn-glycero-3-phosphocholine
1-tetradecyl-2-hexadecanoyl-sn-glycero-3-phosphocholine
1-tetradecyl-2-pentadecanoyl-sn-glycero-3-phosphocholine
1-valeryl-2-hexadecanoyl-sn-glycero-3-phosphocholine
1-valeryl-2-valeryl-sn-glycero-3-phosphocholine
10-formyldihydrofolate
2,2'-(1,4-Anthraquinonylenediimino)bis(5-methyl-benzenesulfonic acid disodium salt
2,2'-Methylenebis(4-ethyl-6-tert-butylphenol)
2,3-Diketo-L-gulonate
2,3-diheptanoyl-sn-glycero-1-phosphocholine
2,3-dihexanoyl-sn-glycero-1-phosphocholine
2,3-dioctanoyl-sn-glycero-1-phosphocholine
2,4-Dithiobiuret
2-((4-(dipropylamino)phenyl)diazenyl)benzoic acid
2-(Hydroximinomethyl)-1-methylpyridinium iodide
2-(benzyloxy)ethyl 2-hydroxypropanoate
2-(hexyloxy)ethyl 2-(((hexyloxy)carbonyl)oxy)propanoate
2-(hexyloxy)ethyl 2-hydroxypropanoate
2-(pentylthio)succinic acid
2-Amino-4-nitrobenzoic acid
2-Chloro-5-nitroaniline
2-Cyclohexyl-4,6-dinitrophenol
2-Decaprenyl-3-methyl-6-methoxy-1,4-benzoquinone
2-Formyl-1-methylpyridinium iodide oxime
2-Formyl-N-methylpyridinium oxime iodide
2-Hydroxy-4-methoxybenzophenone
2-Hydroxyiminomethyl-1-methylpyridinium iodide
2-Hydroxypropanenitrile
2-Hydroxypropionitrile
2-Nitro-4-aminotoluene
2-PAM
2-PAM iodide
2-Pam
2-Pam iodide
2-Propenamide
2-Pyridine aldoxymethiodide
2-Pyridinealdoxime methiodide
2-Pyridinecarboxaldehyde aldoxime methiodide
2-Pyridylaldoxime methiodide
2-amino-4-(hydroxy(oxido)amino)benzoic acid
2-butoxyethyl 2-(((pentyloxy)carbonyl)oxy)propanoate
2-cyclohexyl-4,6-bis(hydroxy(oxido)amino)phenol
2-ethylhexyl 2-hydroxypropanoate
2-hydrazino-3-methylpentanoic acid
2-hydrazino-3-phenylpropanoic acid
2-hydroxy-5-((3-(hydroxy(oxido)amino)phenyl)diazenyl)benzoic acid
2-hydroxypropanenitrile
2-methyl-5-((5-(4-methyl-3-sulfoanilino)-9,10-dioxo-9,10-dihydro-1-anthracenyl)amino)benzenesulfonic acid
2-phenoxyethyl 2-(acetyloxy)propanoate
2-phenoxyethyl 2-hydroxypropanoate
2246
3',3'',5', 5''-Tetrabrom-m-cresolsulfonephthalein
3',3'',5', 5''-Tetrabromo-m-cresolsulfonephthalein
3',3'',5', 5''-Tetrabromophenolsulfonephthalein
3, 3'-Diaminodipropylamine
3, 3'-Dibromothymolsulfonphthalein
3, 3'-Iminobispropionitrile
3, 3'-Iminodipropanenitrile
3, {3'-Iminobis[propylamine]}
3,3',5,5'-Tetrabromophenolsulfonphthalein
3,3'-Iminobis(propylamine)
3,3'-Iminodipropionitrile
3,3'-Oxydipropionitrile
3,4-dihydroxy-9,10-dioxo-9,10-dihydro-2-anthracenesulfonic acid
3-(Diethylamino)-1-propylamine
3-(Diethylamino)propylamine
3-(Isopropylamino)propionitrile
3-(N, N-Diethylamino)-1-propylamine
3-(isopropylamino)propanenitrile
3-Amino-1-propanol
3-Aminopropanol
3-Aminopropyl alcohol
3-Ethoxypropionitrile
3-Hydroxypropylamine
3-Indoleacetonitrile
3-Mercaptolactate-cysteine disulfide
3-Nitro-4-methylaniline
3-Oxohexadecanoyl-CoA
3-Oxooctadecanoyl-CoA
3-Propanolamine
3-amino-1-propanol
3-amino-4-((4'-((1-amino-4-sulfo-2-naphthyl)diazenyl)-3,3'-dimethyl[1,1'-biphenyl]-4-yl)diazenyl)-2,7-naphthalenedisulfonic acid
3-ethoxypropanenitrile
4,4'-Dihydroxyfuchsone
4,6-Dinitro-o-cyclohexylphenol
4-((5-((2,4-diamino-5-methylphenyl)diazenyl)-2-methylphenyl)diazenyl)-6-methyl-1,3-benzenediamine
4-(bis(4-hydroxyphenyl)methylene)-2,5-cyclohexadien-1-one
4-Amino-5-imidazolecarboxamide
4-Aminoimidazole-5-carboxamide
4-Azaheptane-1,7-diamine
4-Carbamoyl-5-aminoimidazole
4-Carboxamido-5-aminoimidazole
4-Hydroxy tolbutamide
4-Hydroxy-3,4'-azodi-1-naphthalenesulfonic acid, disodium salt
4-Hydroxydebrisoquine
4-Methoxy-2-hydroxybenzophenone
4-Nitroanthranilic acid
4-Nitrophenyl sulfate
4-amino-1H-pyrazolo[3,4-d]pyrimidin-6-yl hydrosulfide
4-amino-1H-pyrazolo[3,4-d]pyrimidine-6-thiol
4-amino-1H-pyrazolo[3,4-d]pyrimidine-6-thiol 4-amino-1H-pyrazolo[3,4-d]pyrimidin-6-yl hydrosulfide
4-anilinobenzenesulfonic acid
4-hydroxybenzoyl-CoA
5-Aminoimidazol-4-carboxamide
5-Methyldihydrofolic acid
5-Nitro-4-toluidine
5-amino-1H-imidazole-4-carboxamide
5-hydrazino-5-oxonorvaline
5-methyl-2-((4-(4-methyl-2-sulfoanilino)-9,10-dioxo-9,10-dihydro-1-anthracenyl)amino)benzenesulfonic acid
6-Chloro-3-nitroaniline
6-Cicloesil-2,4-dinitr-fenolo(ITALIAN)
6-Cyclohexyl-2, 4-dinitrophenol
6-heptadecyl-1,3,5-triazine-2,4-diamine
6-hydroxy-5-((4-methylphenyl)diazenyl)-2-naphthalenesulfonic acid
8-Amino-1-naphthalenesulfonic acid
8-amino-1-naphthalenesulfonic acid
8-iso-13,14-dihydro-15-keto-PGF2a
9,10-Anthracenedione, 1,2,4,5,8-pentahydroxy-
A-22-46
ADP-Mannose
ADP-glucose
AIC
AICA
Acetocyanohydrin
Acrylic amide
Airedale Carmoisine
Albutest
Alizarinsulfonate
Alterungsschutzmittel BKF
Amidoschwarz
Aminobis(propylamine)
Aniline violet (component of)
Aniline, 2-chloro-5-nitro-
Aniline-.omega.-acid
Aniline-.omega.-sulfonic acid
Anilino-N-methanesulfonic acid
Anilinomethanesulfonic acid
Anthranilic acid, 4-nitro-
Anthraquinone Green G
Anthraquinone Green GNN
Anthraquinone, 1,2,4,5,8-pentahydroxy-
Anuvex
Aquamine Green GL
Arachidonyl carnitine
Arachidonyl-CoA
Arachidyl carnitine
Aurin
Aurine
Azorubin
BACIILUS CALMETTE GUERIN VACCINE
BBCE
BCG
BKF
Benzophenone-3
Beta-1,4-mannose-N-acetylglucosamine
Bindschedler's green
Bis(.beta.-chloroethyl) vinylphosphonate
Bis(.beta.-cyanoethyl)amine
Bis(2-chloroethyl) vinylphosphonate
Bis(2-cyanoethyl) ether
Bis(2-cyanoethyl)amine
Bis(2-hydroxy-3-tert-butyl-5-ethylphenyl)methane
Bis(3-aminopropyl)amine
Bis(cyanoethyl)amine
Bis(tridecyl) sodium sulfosuccinate
Bis-(2-chlorethyl)vinylfosfonat(CZECH)
Bu-cerumen
Bulana
Calcitroic acid
Caldine
Carmoisine
Clupanodonic acid
Clupanodonyl carnitine
Colahepat
Corallin
D-Aspartic acid
D-Glucurono-6,3-lactone
D-Lactaldehyde
D-Xylulose 1-phosphate
Debrisoquine
Di(2-cyanoethyl)amine
Di(2-hydroxy-5-methyl-3-tert-butylphenyl)methane
Dibromothymolsulfophthalein
Diethylamine, 2,2'-dicyano-
Diethylaminotrimethylenamine
Diethylcyanamide
Diisopropylcyanamide
Diisopropylcyanimide
Dimethylcyanamide
Dina acid
Dinex
Dinitro-o-cyclohexylphenol
Dinitrocyclohexylphenol
Diotilan
Dithiobiuret
Dnochp
Ecgonine
Elaidic carnitine
Ethylenecarboxamide
Eurocert Azorubine
Gamma-linolenoyl-CoA
Homocysteinesulfinic acid
IDPN
Imidodicarbonimidothioic diamide
Imidodicarbonodithioic diamide
Imino-.beta.,.beta.'-dipropionitrile
Imino-bis(3-propylamine)
Iminobis(propylamine)
Iminodipropanenitrile
Initiating explosive iminobispropylamine (DOT)
Iso-Valeraldehyde
Karmesin
L-.gamma.-Glutamylhydrazide
L-2-Amino-3-oxobutanoic acid
L-4-Hydroxyglutamate semialdehyde
L-Glutamate-.gamma.-hydrazide
L-Glutamic acid .gamma.-hydrazide
L-Glutamic acid, 5-hydrazide
L-Glutamic acid, N-(m-nitrobenzoyl)-
L-Glyceric acid
Lactonitrile
Lauryl lactate
Linoelaidyl carnitine
Linoleyl carnitine
MOB
MOD
Manoxol
Mauve
Mauveine
Methylenebisacrylamide
Methylenediacrylamide
Methylisocitric acid
N''-(2-pyridinylmethylene)thiocarbonohydrazide
N, N'-Methylenebisacrylamide
N, N'-Methylidenebisacrylamide
N, N-(Diethylamino)propylamine
N, N-Diethyl-1,3-propanediamine
N, N-Diethylcyanamide
N, N-Diethyltrimethylenediamine
N,N'-Methylenebis(acrylamide)
N,N'-Methylenediacrylamide
N,N-Bis(2-cyanoethyl)amine
N,N-Diethyl-1,3-diaminopropane
N,N-Diethylpropylenediamine
N-((acryloylamino)methyl)acrylamide
N-(3-(hydroxy(oxido)amino)benzoyl)glutamic acid
N-(3-Aminopropyl)-1, 3-propanediamine
N-(3-Diethylaminopropyl)amine
N-(Sulfomethyl)aniline
N-3-Aminopropyl-1,3-diaminopropane
N-Acetyl-D-galactosamine 1-phosphate
N-Acetyl-L-glutamate 5-semialdehyde
N-Acetyl-L-glutamyl 5-phosphate
N-Cyano-N-methylmethanamine
N-Diethyltrimethylenediamine
N-Methylpyridine-2-aldoxime iodide
N-Methylpyridinium-2-aldoxime iodide
N-Phenylsufanilic acid, barium salt
Nervonyl carnitine
N~1~,N~1~-diethyl-1,3-propanediamine
N~1~-(3-aminopropyl)-1,3-propanediamine
N~1~-(4-(dimethyl-lambda~5~-azanylidene)-2,5-cyclohexadien-1-ylidene)-N~4~,N~4~-dimethyl-1,4-benzenediamine
N~7~,N~7~-diethyl-3-imino-8-methyl-3H-phenoxazine-1,7-diamine
Oxybenzon
Oxybenzone(USAN)
P1,P4-Bis(5'-adenosyl) tetraphosphate
Pedinex(FRENCH)
Pelex OT
Pentaglutamyl folate
Peri acid
Phenazinium, 3-amino-2, 8-dimethyl-5-(4-methylphenyl)-7-(phenylamino)-, sulfate (2:1)
Phenazinium, 3-amino-7-anilino-2,8-dimethyl-5-p-tolyl-, sulfate (2:1)
Phenol, 2-cyclohexyl-4,6-dinitro-
Phenol, 4,4'-(3H-2, {1-benzoxathiol-3-ylidene)bis[2-bromo-3-methyl-6-(1-methylethyl)-,} S,S-dioxide
Phenol, 4,4'-(3H-2, {1-benzoxathiol-3-ylidene)bis[2-bromo-6-chloro-,} S,S-dioxide
Phenol, 6-cyclohexyl-2, 4-dinitro-
Phenol, {2,2'-methylenebis[6-(1,1-dimethylethyl)-4-ethyl-}
Phenol, {2,2'-methylenebis[6-(1,} 1-dimethylethyl)-4-methyl-
Phenol, {2,2'-methylenebis[6-tert-butyl-4-ethyl-}
Phenol, {4,4'-(3H-2,1-benzoxathiol-3-ylidene)bis[2,6-dibromo-,} S, S-dioxide
Phenol, {4,4'-(3H-2,1-benzoxathiol-3-ylidene)bis[2,6-dibromo-3-methyl-,} S, S-dioxide
Phenylacetyl-CoA
Phosphonic acid, ethenyl-, bis(2-chloroethyl) ester
Phosphonic acid, vinyl-, bis(2-chloroethyl) ester
Poly(acrylonitrile)
Poly(acrylonitrile), fibers
Polyacrylonitrile
Pralidoxime iodide(USAN)
Previtamin D3
Propanenitrile, 2-hydroxy-
Propanenitrile, 3,3'-iminobis-
Propanenitrile, 3,3'-oxybis-
Propanenitrile, 3-ethoxy-
Propanenitrile, {3-[(1-methylethyl)amino]-}
Propanoic acid, 2-hydroxy-, dodecyl ester
Propanolamine
Propenamide
Propionitrile, 2-hydroxy-
Propionitrile, 3, 3'-iminodi-
Propionitrile, 3,3'-oxydi-
Propionitrile, 3-(isopropylamino)-
Propionitrile, 3-ethoxy-
Propyl Red
Propyl red
Propylamine, 3,3'-iminobis-
Protopam iodide
Pseudoecgonyl-CoA
Pyridin-2-aldoxin (CZECH)
Pyridine aldoxime methiodide
Pyridine-2-aldoxime methiodide
Pyridine-2-aldoxime methyl iodide
Pyridinium, 2-formyl-1-methyl-, iodide, oxime
Pyridinium, 2-formyl-1-methyl--iodide, oxime
Pyridinium, {2-[(hydroxyimino)methyl]-1-methyl-,} iodide
Pyridinium-2-aldoxime N-methyliodide
Rapisol
Retinoyl CoA
Rosolic acid
S 67
SN 46
Schollkopf's acid (VAN)
Selenophosphate
Sialyl-Lewis X
Solbaleite
Standacol carmoisine
Stearidonic acid
Stearidonoyl CoA
Stearidonyl carnitine
Tetrabromo-m-cresolphthalein sulfone
Tetracosanoyl-CoA
Tyramine-O-sulfate
Vaccenyl carnitine
Vesuvine
Vinifos
acrylamide
anilinomethanesulfonic acid
bis(2-chloroethyl) vinylphosphonate
butyl 2-(((octyloxy)carbonyl)oxy)propanoate
cis-2-Methyl-5-isopropylhexa-2,5-dienoyl-CoA
cis-2-Methylaconitate
dIMP
dicarbonodithioimidic diamide
diethyl 2,6-dibromoheptanedioate
diethyl 2,6-dimethyl-4-oxo-4H-pyran-3,5-dicarboxylate
diethyl 2,8-dibromononanedioate
diethylcyanamide
diisopropylcyanamide
dimethylcyanamide
m-Nitro-p-toluidine
m-Toluenesulfonic acid, 6,6'-(1,4-anthraquinonylenediimino)di-, disodium salt
malic acid
p-2-Am
p-Rosolic acid
peri Acid
trans-2-Methyl-5-isopropylhexa-2,5-dienoyl-CoA
trans-3-Hexenoyl-CoA
trans-Octadec-2-enoyl-CoA
{3,3'-Iminobis[propanenitrile]}
{4-Hydroxy-3-[(4-sulfo-1-naphthalenyl)azo]-1-naphthalenesulfonic} acid, disodium salt
{Bicyclo[2.2.1]heptane-1-acetic} acid, 2-chloro-7,7-dimethyl-
{[1,1'-Binaphthalene]-8,8'-dicarboxylic} acid

  """

    Analyzer ana = new SimpleTextAnalyzer()

    Set<String> result = ana.analyze(synonomys)

    println "false negative"

    int neg = 0
    int negTotal = 0
    synonomys.split("\n").each {
      negTotal++
      if (!result.contains(it.trim())) {
        if (it.trim().size() > 0) {
          neg++
          println "negative: ${it}"
        }
      }
    }

    println "-----------"

    int pos = 0;
    int total = 0;
    println "false positive"

    boolean first = true
    println "select distinct lower(name) from synonym where lower(name) in ("
    result.each {String s ->

      total++
      if (!synonomys.split("\n").toList().contains(s)) {
        if (!first) {
          print ","
        }
        else {
          first = false
        }
        println "'${s.toLowerCase().replaceAll('\'', '\\\'')}'"


        pos++
      }

    }
    println ")"
    println "-----------"

    println "false pos: ${(double) pos / (double) total * 100}"
    println "false neg: ${(double) neg / (double) negTotal * 100}"

    assertTrue((double) neg / (double) negTotal * 100 < 40)


  }

  /**
   * analyzes a paper
   */
  void testAnalyzePaper() {

    Analyzer ana = new SimpleTextAnalyzer()

    Set<String> result = ana.analyze(new File("./test/unit/resources/paper.txt"))
   /*
    result.each {

      println it

    }
    */

    //these hits should be all in our result
    assertTrue(result.contains("vitamin-A"))
    assertTrue(result.contains("beta-carotene"))
    assertTrue(result.contains("Bisbynin"))
    assertTrue(result.contains("trans-luteine"))
    assertTrue(result.contains("cholesterol"))
    assertTrue(result.contains("Malathion"))
    assertTrue(result.contains("Chlorpyrifos"))
    assertTrue(result.contains("Ribosylnicotinamide"))
    assertTrue(result.contains("Omeprazol"))
    assertTrue(result.contains("Tegafur"))
    assertTrue(result.contains("arginine"))

    println "result size: ${result.size()}"
    
    Set<String> filtered = FilterFactory.createStopListFilter(new File("./src/java/com/fiehn/filter/resources/SimpleStopListDictionary.txt")).filter(result)

    println "fitlered size: ${filtered.size()}"

    println "select distinct lower(name) from synonym where lower(name) in ("

    filtered.each {

      println "'${it.toLowerCase()}',"

    }

    println "' ');"

  }
}

